HOMEWORK 4 - 5

INTRODUCTION

The data set provided by Trendyol consists of 13 columns and 4331 rows. There are 9 products and for each there are 12 attributes for each day between 05-25-2020 and 31-05-2021.

Type of event_date transformed to “Date” and “Day”, “Month” and “Year” columns are added to the data set.

For each of 9 products a separate data set is constructed and observations ordered from old to new.

library(data.table)
library(lubridate)
library(ggplot2)
library(forecast)
library(urca)


setwd("C:/Users/Pınar YILDIRIM/Downloads")
#Data download
my_data= read.csv("ProjectRawData.csv")
my_data=as.data.table(my_data)

my_data$event_date=ymd(my_data$event_date)
my_data[,"Day" := lubridate::wday(event_date, label=TRUE)]
my_data[,"Month" := lubridate::month(event_date, label=TRUE)]
my_data[,"Year" := lubridate::year(event_date)]


#PRODUCTS

###PRODUCT 1
product1= my_data[my_data$product_content_id==31515569,]
#order observations from old to new
product1=(product1[order(product1$event_date),])
my_data[product_content_id==31515569, product:="1"]


###PRODUCT 2
product2= my_data[my_data$product_content_id==32737302,]
#order observations from old to new
product2=(product2[order(product2$event_date),])
my_data[product_content_id==32737302, product:="2"]


###PRODUCT 3
product3= my_data[my_data$product_content_id==32939029,]
#order observations from old to new
product3=(product3[order(product3$event_date),])
my_data[product_content_id==32939029, product:="3"]



###PRODUCT 4
product4= my_data[my_data$product_content_id==4066298,]
#order observations from old to new
product4=(product4[order(product4$event_date),])
my_data[product_content_id==4066298, product:="4"]


###PRODUCT 5
product5= my_data[my_data$product_content_id==48740784,]
#order observations from old to new
product5=(product5[order(product5$event_date),])
my_data[product_content_id==48740784, product:="5"]



###PRODUCT 6
product6= my_data[my_data$product_content_id==6676673,]
#order observations from old to new
product6=(product6[order(product6$event_date),])
my_data[product_content_id==6676673, product:="6"]


###PRODUCT 7
product7= my_data[my_data$product_content_id==7061886,]
#order observations from old to new
product7=(product7[order(product7$event_date),])
my_data[product_content_id==7061886, product:="7"]



###PRODUCT 8
product8= my_data[my_data$product_content_id==73318567,]
#order observations from old to new
product8=(product8[order(product8$event_date),])
my_data[product_content_id==73318567, product:="8"]


###PRODUCT 9
product9= my_data[my_data$product_content_id==85004,]
#order observations from old to new
product9=(product9[order(product9$event_date),])
my_data[product_content_id==85004, product:="9"]


accu=function(actual,forecast){
  n=length(actual)
  error=actual-forecast
  error=complete.cases(error)
  mean=mean(actual)
  #sd=sd(actual)
  #CV=sd/mean
  FBias=sum(error)/sum(actual)
  MAPE=sum(abs(error/actual))/n
  RMSE=sqrt(sum(error^2)/n)
  MAD=sum(abs(error))/n
  MADP=sum(abs(error))/sum(abs(actual))
  WMAPE=MAD/mean
  l=data.frame(n,mean,FBias,MAPE,RMSE,MAD,MADP,WMAPE)
  return(l)
}

DECOMPOSITION

Product 1

ID of product 1 is “31515569” and it is a legging by “TRENDYOLMILLA”.

(ggplot(product1,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 1",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 1

The first plot shows the average of products sold for each weekday.

product1[,wday_mean := mean(sold_count), by=.(Day)]

(ggplot(product1,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.

product1[,wday_mean := mean(sold_count), by=.(Day)]

(ggplot(product1,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has different effects depending on the month and year.

product1[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product1,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month there is not a clear pattern as seen in the plot below.

pr1_wday=product1[,list(wday_mean), by=.(Day, Month, year(event_date))]
pr1_wday
##      Day Month year wday_mean
##   1: Pzt   May 2020    610.00
##   2: Sal   May 2020    437.00
##   3: Çar   May 2020    270.00
##   4: Per   May 2020    366.00
##   5: Cum   May 2020   1188.00
##  ---                         
## 368: Per   May 2021    417.25
## 369: Cum   May 2021    388.00
## 370: Cum   May 2021    388.00
## 371: Cum   May 2021    388.00
## 372: Cum   May 2021    388.00
(ggplot(pr1_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr1_wday$year)+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 1 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption for a period. Test- statistic of KPSS test is smaller than the critical values which indicates stationarity.

weekly_pr1  = ts(product1$sold_count, freq=7)
weekly_decomp_pr1_add = decompose(weekly_pr1)
plot(weekly_decomp_pr1_add)

test_stat <- ur.kpss(weekly_decomp_pr1_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0149 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

In multiplicative decomposition there is still a seasonality in Trend term but random term has smaller variance than the additive decomposition. KPSS test statistic is greater than the additive decomposition.

weekly_decomp_pr1_mult = decompose(weekly_pr1, type="multiplicative")
plot(weekly_decomp_pr1_mult)

test_stat <- ur.kpss(weekly_decomp_pr1_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.1564 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 1

The first plot shows the average of products sold for each day of month.

product1[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product1,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product1[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product1,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021 there a pattern is observed. On the 10th day sales are high and around 15th day sales drop.

pr1_day=product1[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr1_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 1",y="Average Sold Count by Days of Month", x="Event Date"))

Additive monthly decomposition does not have a seasonality in Trend term.

KPSS test has small test statistic

monthly_pr1  = ts(product1$sold_count, freq=30)
monthly_decomp_pr1 = decompose(monthly_pr1)
plot(monthly_decomp_pr1)

test_stat <- ur.kpss(monthly_decomp_pr1$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0177 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr1_mult = decompose(monthly_pr1, type="multiplicative")
plot(monthly_decomp_pr1_mult)

test_stat <- ur.kpss(monthly_decomp_pr1_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0551 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

The smallest test statistics belongs to weekly additive decomposition for Product 1.

Product 2

ID of product 2 is “32737302” and it is a bikini top by “TRENDYOLMILLA”.

(ggplot(product2,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 2",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 2

The first plot shows the average of products sold for each weekday.

product2[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product2,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021. The change in mean is due to 2020 winter period where almost zero products are sold.

product2[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product2,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.

product2[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product2,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month there is a pattern pbserved in summer months.

pr2_wday=product2[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr2_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr2_wday$year)+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 2 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption.

weekly_pr2  = ts(product2$sold_count, freq=7)
weekly_decomp_pr2_add = decompose(weekly_pr2)
plot(weekly_decomp_pr2_add)

test_stat <- ur.kpss(weekly_decomp_pr2_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0319 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

In multiplicative decomposition there is still a seasonality in Trend term but random term has smaller variance than the additive decomposition.

weekly_decomp_pr2_mult = decompose(weekly_pr2, type="multiplicative")
plot(weekly_decomp_pr2_mult)

test_stat <- ur.kpss(weekly_decomp_pr2_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.1042 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 2

The first plot shows the average of products sold for each day of month.

product2[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product2,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021. Patterns are similar in both years.

product2[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product2,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021 the patterns are not similar. This observation may be due to the winter period of 2020.

pr2_day=product2[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr2_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 2",y="Average Sold Count by Days of Month", x="Event Date"))

Constant variance assumption of random term is violated.

monthly_pr2  = ts(product2$sold_count, freq=30)
monthly_decomp_pr2 = decompose(monthly_pr2)
plot(monthly_decomp_pr2)

test_stat <- ur.kpss(weekly_decomp_pr2_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.1277 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr2_mult = decompose(monthly_pr2, type="multiplicative")
plot(monthly_decomp_pr2_mult)

test_stat <- ur.kpss(weekly_decomp_pr2_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.1277 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 2 since its test statistic is the smaller

Product 3

ID of product 3 is “32939029” and it is a chargable tooth brush by “Oral-B”.

(ggplot(product3,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 3",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 3

The first plot shows the average of products sold for each weekday.

product3[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product3,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.

product3[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product3,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.

product3[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product3,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed.

pr3_wday=product3[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr3_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr3_wday$year)+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 3 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption.

weekly_pr3  = ts(product3$sold_count, freq=7)
weekly_decomp_pr3_add = decompose(weekly_pr3)
plot(weekly_decomp_pr3_add)

test_stat <- ur.kpss(weekly_decomp_pr3_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.024 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

In multiplicative decomposition there is still a seasonality in Trend term but random term has smaller variance than the additive decomposition.

weekly_decomp_pr3_mult = decompose(weekly_pr3, type="multiplicative")
plot(weekly_decomp_pr3_mult)

test_stat <- ur.kpss(weekly_decomp_pr3_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.3276 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 3

The first plot shows the average of products sold for each day of month.

product3[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product3,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product3[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product3,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021 a pattern is observed in the plot below.

pr3_day=product3[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr3_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 3",y="Average Sold Count by Days of Month", x="Event Date"))

In additive decomposition constant variance assumption of random term is violated.

monthly_pr3  = ts(product3$sold_count, freq=30)
monthly_decomp_pr3 = decompose(monthly_pr3)
plot(monthly_decomp_pr3)

test_stat <- ur.kpss(monthly_decomp_pr3$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0687 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr3_mult = decompose(monthly_pr3, type="multiplicative")
plot(monthly_decomp_pr3_mult)

test_stat <- ur.kpss(monthly_decomp_pr3_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.1196 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 3 since its test statistics is the smallest.

Product 4

ID of product 4 is “4066298” and it is a baby wet wipes by “Sleepy”.

(ggplot(product4,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 4",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 4

The first plot shows the average of products sold for each weekday.

product4[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product4,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.

product4[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product4,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.

product4[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product4,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed mostly in winter months.

pr4_wday=product4[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr4_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr4_wday$year)+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 4 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption.

weekly_pr4  = ts(product4$sold_count, freq=7)
weekly_decomp_pr4_add = decompose(weekly_pr4)
plot(weekly_decomp_pr4_add)

test_stat <- ur.kpss(weekly_decomp_pr4_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.014 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

In multiplicative decomposition there is still a seasonality in Trend term but random term has more constant variance than the additive decomposition.

weekly_decomp_pr4_mult = decompose(weekly_pr4, type="multiplicative")
plot(weekly_decomp_pr4_mult)

test_stat <- ur.kpss(weekly_decomp_pr4_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.1415 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 4

The first plot shows the average of products sold for each day of month.

product4[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product4,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product4[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product4,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021.

pr4_day=product4[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr4_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 4",y="Average Sold Count by Days of Month", x="Event Date"))

Monthly decompositions of sold count of Product 4 are given below.

Seasonality in trend is removed.

monthly_pr4  = ts(product4$sold_count, freq=30)
monthly_decomp_pr4 = decompose(monthly_pr4)
plot(monthly_decomp_pr4)

test_stat <- ur.kpss(monthly_decomp_pr4$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0299 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr4_mult = decompose(monthly_pr4, type="multiplicative")
plot(monthly_decomp_pr4_mult)

test_stat <- ur.kpss(monthly_decomp_pr4_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0243 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 4 since its test statistics is the smallest and random term is the most similar to a white noise series.

Product 5

ID of product 5 is “48740784” and it is a coat by “Altınyıldız Classics”.

(ggplot(product5,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 5",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 5

The first plot shows the average of products sold for each weekday.

product5[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product5,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 5",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021. Coat sales mostly occur in winter months. Since winter of 2021 is not observed yet mean of the sold count is smaller for 2021.

product5[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product5,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 5",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.

product5[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product5,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 5",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 5 is decomposed weekly and additively. Trend term indicates the winter period where the coat sales are higher and random term has constant variance except an outlier period.

weekly_pr5  = ts(product5$sold_count, freq=7)
weekly_decomp_pr5_add = decompose(weekly_pr5)
plot(weekly_decomp_pr5_add)

test_stat <- ur.kpss(weekly_decomp_pr5_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0124 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

In multiplicative decomposition there is still a seasonality in Trend term and random term has less constant variance than the additive decomposition.

weekly_decomp_pr5_mult = decompose(weekly_pr5, type="multiplicative")
plot(weekly_decomp_pr5_mult)

test_stat <- ur.kpss(weekly_decomp_pr5_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0981 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 5

The first plot shows the average of products sold for each day of month.

product5[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product5,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 5",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product5[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product5,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 5",y="Average Sold Count by Days of Month", x="Event Date"))

Monthly decompositions of sold count of Product 5 are given below. Trend term show the high coat sale period. Effect of seasonality can be observed.

monthly_pr5  = ts(product5$sold_count, freq=30)
monthly_decomp_pr5 = decompose(monthly_pr5)
plot(monthly_decomp_pr5)

test_stat <- ur.kpss(monthly_decomp_pr5$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0195 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr5_mult = decompose(monthly_pr5, type="multiplicative")
plot(monthly_decomp_pr5_mult)

test_stat <- ur.kpss(monthly_decomp_pr5_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.1544 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 5 since its test statistics is the smallest and random term is the most similar to a white noise series.

Product 6

ID of product 6 is “6676673” and it is bluetooth earphones by “Xiaomi”.

(ggplot(product6,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 6",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 6

The first plot shows the average of products sold for each weekday.

product6[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product6,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.

product6[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product6,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.

product6[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product6,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed for some of the months.

pr6_wday=product6[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr6_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr6_wday$year)+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 6 is decomposed weekly and additively. Seasonality can be observed. Random term has constant mean and more or less constant variance.

weekly_pr6  = ts(product6$sold_count, freq=7)
weekly_decomp_pr6_add = decompose(weekly_pr6)
plot(weekly_decomp_pr6_add)

test_stat <- ur.kpss(weekly_decomp_pr6_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0135 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
weekly_decomp_pr6_mult = decompose(weekly_pr6, type="multiplicative")
plot(weekly_decomp_pr6_mult)

test_stat <- ur.kpss(weekly_decomp_pr6_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0289 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 6

The first plot shows the average of products sold for each day of month.

product6[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product6,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product6[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product6,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021. A clear pattern can be observed from the plot below.

pr6_day=product6[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr6_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 6",y="Average Sold Count by Days of Month", x="Event Date"))

Monthly decompositions of sold count of Product 6 are given below. Trend term does not include seasonality. Random term satisfies assumptions.

monthly_pr6  = ts(product6$sold_count, freq=30)
monthly_decomp_pr6 = decompose(monthly_pr6)
plot(monthly_decomp_pr6)

test_stat <- ur.kpss(monthly_decomp_pr6$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0333 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr6_mult = decompose(monthly_pr6, type="multiplicative")
plot(monthly_decomp_pr6_mult)

test_stat <- ur.kpss(monthly_decomp_pr6_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0269 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 6 since its test statistics is the smallest and random term is the most similar to a white noise series.

Product 7

ID of product 7 is “7061886” and it is bluetooth earphones by “Xiaomi”.

(ggplot(product7,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 7",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 7

The first plot shows the average of products sold for each weekday.

product7[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product7,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.

product7[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product7,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month.

product7[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product7,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed.

pr7_wday=product7[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr7_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr7_wday$year)+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 7 is decomposed weekly and additively. Seasonality can be observed. Random term has constant mean and more or less constant variance except in some periods that could be outliers.

weekly_pr7  = ts(product7$sold_count, freq=7)
weekly_decomp_pr7_add = decompose(weekly_pr7)
plot(weekly_decomp_pr7_add)

test_stat <- ur.kpss(weekly_decomp_pr7_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0136 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

For multiplicative series variance of random term is more close to constant.

weekly_decomp_pr7_mult = decompose(weekly_pr7, type="multiplicative")
plot(weekly_decomp_pr7_mult)

test_stat <- ur.kpss(weekly_decomp_pr7_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.1076 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 7

The first plot shows the average of products sold for each day of month.

product7[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product7,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product7[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product7,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021.

pr7_day=product7[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr7_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 7",y="Average Sold Count by Days of Month", x="Event Date"))

Monthly decompositions of sold count of Product 7 are given below. Trend term does not include seasonality. Random term satisfies assumptions.

monthly_pr7  = ts(product7$sold_count, freq=30)
monthly_decomp_pr7 = decompose(monthly_pr7)
plot(monthly_decomp_pr7)

test_stat <- ur.kpss(monthly_decomp_pr7$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0196 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr7_mult = decompose(monthly_pr7, type="multiplicative")
plot(monthly_decomp_pr7_mult)

test_stat <- ur.kpss(monthly_decomp_pr7_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.023 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 7 since its test statistics is the smallest and random term is the most similar to a white noise series.

Product 8

ID of product 8 is “73318567” and it is a bikini top by “TRENDYOL MILLA”.

(ggplot(product8,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 8",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 8

The first plot shows the average of products sold for each weekday.

product8[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product8,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 8",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021. There is not any sales record in 2020.

product8[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product8,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 8",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month.

product8[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product8,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 8",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 8 is decomposed weekly and additively. Seasonality can be observed. Trend term specifies the periods where the sold count is non-zero.

weekly_pr8  = ts(product8$sold_count, freq=7)
weekly_decomp_pr8_add = decompose(weekly_pr8)
plot(weekly_decomp_pr8_add)

test_stat <- ur.kpss(weekly_decomp_pr8_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0484 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
weekly_decomp_pr8_mult = decompose(weekly_pr8, type="multiplicative")
plot(weekly_decomp_pr8_mult)

test_stat <- ur.kpss(weekly_decomp_pr8_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0706 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 8

The first plot shows the average of products sold for each day of month.

product8[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product8,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 8",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product8[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product8,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 8",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021.

pr8_day=product8[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr8_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 8",y="Average Sold Count by Days of Month", x="Event Date"))

Monthly decompositions of sold count of Product 8 are given below.

monthly_pr8  = ts(product8$sold_count, freq=30)
monthly_decomp_pr8 = decompose(monthly_pr8)
plot(monthly_decomp_pr8)

test_stat <- ur.kpss(monthly_decomp_pr8$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0313 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr8_mult = decompose(monthly_pr8, type="multiplicative")
plot(monthly_decomp_pr8_mult)

test_stat <- ur.kpss(monthly_decomp_pr8_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.1496 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive monthly decomposition is chosen for Product 8 since its test statistics is the smallest and random term is the most similar to a white noise series.

Product 9

ID of product 9 is “85004” and it is face cleaner by “La Roche Posay”.

(ggplot(product9,aes(x=event_date,y=sold_count))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 9",y="Sold Count", x="Event Date"))

Weekly Decomposition of Product 9

The first plot shows the average of products sold for each weekday.

product9[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product9,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))

Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.

product9[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product9,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))

Third plot shows the average sold counts for each weekday for each month. Effect of each day of week is similar for each month.

product9[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product9,aes(x=event_date,y=wday_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))

Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed.

pr9_wday=product9[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr9_wday,aes(x=Day,y=wday_mean))+
    geom_line(aes(group=factor(Month), color=Month))+
    facet_grid(rows=pr9_wday$year)+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))

Time series of Prouduct 9 is decomposed weekly and additively. Seasonality can be observed. Trend term has somewhat a seasonality. Random term has constant mean and more or less constant variance except in some periods that could be outliers.

weekly_pr9  = ts(product9$sold_count, freq=7)
weekly_decomp_pr9_add = decompose(weekly_pr9)
plot(weekly_decomp_pr9_add)

test_stat <- ur.kpss(weekly_decomp_pr9_add$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0124 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

For multiplicative series variance of random term is more close to constant.

weekly_decomp_pr9_mult = decompose(weekly_pr9, type="multiplicative")
plot(weekly_decomp_pr9_mult)

test_stat <- ur.kpss(weekly_decomp_pr9_mult$random, use.lag = "8")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.1574 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Monthly Decomposition of Product 9

The first plot shows the average of products sold for each day of month.

product9[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product9,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Days of Month", x="Event Date"))

Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.

product9[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product9,aes(x=event_date,y=day_mean))+
    geom_line()+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Days of Month", x="Event Date"))

Finally, when sold counts of each day of month is plotted for each 2020 and 2021. A pattern could be observed.

pr9_day=product9[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr9_day,aes(x=day,y=day_mean))+
    geom_line(aes(group=factor(year), color=factor(year)))+
    theme_minimal()+
    labs(title="Product 9",y="Average Sold Count by Days of Month", x="Event Date"))

Monthly decomposition of sold count of Product 9 are given below. Trend term does not include seasonality.

monthly_pr9  = ts(product9$sold_count, freq=30)
monthly_decomp_pr9 = decompose(monthly_pr9)
plot(monthly_decomp_pr9)

test_stat <- ur.kpss(monthly_decomp_pr9$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.0225 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
monthly_decomp_pr9_mult = decompose(monthly_pr9, type="multiplicative")
plot(monthly_decomp_pr9_mult)

test_stat <- ur.kpss(monthly_decomp_pr9_mult$random, use.lag = "12")
summary(test_stat)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 12 lags. 
## 
## Value of test-statistic is: 0.024 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Additive weekly decomposition is chosen for Product 9 since its test statistics is the smallest and random term is the most similar to a white noise series.

ARIMA MODELS

Product 1

Product 1 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr1  = ts(product1$sold_count, freq=7)
weekly_decomp_pr1_add = decompose(weekly_pr1)
plot(weekly_decomp_pr1_add$random)

ACF plot of the random term shows sinusodial behavior and PACF plot has some spikes until lag 3 with some what an exponential decay behavior. ARIMA(p,0,q) models will be used.

acf(weekly_decomp_pr1_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr1_add$random, na.action = na.pass, lag.max = 50)

ARIMA(p,0,0) models with different p values will be tried. After p=8 AIC values starts to increase.

AIC(arima(weekly_decomp_pr1_add$random, order = c(1,0,0)))
## [1] 5668.436
AIC(arima(weekly_decomp_pr1_add$random, order = c(2,0,0)))
## [1] 5654.891
AIC(arima(weekly_decomp_pr1_add$random, order = c(3,0,0)))
## [1] 5625.933
AIC(arima(weekly_decomp_pr1_add$random, order = c(4,0,0)))
## [1] 5602.433
AIC(arima(weekly_decomp_pr1_add$random, order = c(5,0,0)))
## [1] 5599.56
AIC(arima(weekly_decomp_pr1_add$random, order = c(6,0,0)))
## [1] 5596.376
AIC(arima(weekly_decomp_pr1_add$random, order = c(7,0,0)))
## [1] 5589.928
AIC(arima(weekly_decomp_pr1_add$random, order = c(8,0,0)))
## [1] 5584.987
AIC(arima(weekly_decomp_pr1_add$random, order = c(9,0,0)))
## [1] 5586.433

ARIMA (0,0,q) models with different q values will be tried. After q=9 AIC values starts to increase

AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,1)))
## [1] 5662.934
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,2)))
## [1] 5662.562
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,3)))
## [1] 5588.176
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,4)))
## [1] 5555.225
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,5)))
## [1] 5552.875
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,6)))
## [1] 5548.857
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,7)))
## [1] 5505.549
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,8)))
## [1] 5498.04
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,9)))
## [1] 5498.274
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,10)))
## [1] 5499.914
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,11)))
## [1] 5501.853

Combining two models as ARIMA(8,0,9)

model1 = arima(weekly_decomp_pr1_add$random, order = c(8,0,9))
AIC(model1)
## [1] 5503.671

Fitting the model

pr1_fitted = weekly_decomp_pr1_add$random - residuals(model1)
pr1_fitted_transformed = pr1_fitted+weekly_decomp_pr1_add$seasonal+weekly_decomp_pr1_add$trend
pr1_predictions=cbind(sold_count=weekly_pr1,fitted=pr1_fitted_transformed)
pr1_predictions=as.data.table(pr1_predictions)
pr1_predictions$date=product1$event_date

ggplot(pr1_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_1 = accu(pr1_predictions$sold_count, pr1_predictions$fitted)
print(accu_1)
##     n    mean       FBias        MAPE      RMSE      MAD        MADP
## 1 372 858.207 0.001146426 0.002160051 0.9919027 0.983871 0.001146426
##         WMAPE
## 1 0.001146426

Product 2

Product 2 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr2= ts(product2$sold_count, freq=7)
weekly_decomp_pr2_add = decompose(weekly_pr2)
plot(weekly_decomp_pr2_add$random)

ACF plot of the random term shows sinusodial behavior and PACF plot has some spikes until lag 4 with some what an exponential decay behavior. Behaviour is similar to the product 1. ARIMA (p,0,q) models will be used.

acf(weekly_decomp_pr2_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr2_add$random, na.action = na.pass, lag.max = 50)

ARIMA(p,0,0) models with different p values will be tried. After p=8 AIC values starts to increase.

AIC(arima(weekly_decomp_pr2_add$random, order = c(1,0,0)))
## [1] 2303.525
AIC(arima(weekly_decomp_pr2_add$random, order = c(2,0,0)))
## [1] 2255.9
AIC(arima(weekly_decomp_pr2_add$random, order = c(3,0,0)))
## [1] 2229.862
AIC(arima(weekly_decomp_pr2_add$random, order = c(4,0,0)))
## [1] 2231.27
AIC(arima(weekly_decomp_pr2_add$random, order = c(5,0,0)))
## [1] 2195.715
AIC(arima(weekly_decomp_pr2_add$random, order = c(6,0,0)))
## [1] 2142.98
AIC(arima(weekly_decomp_pr2_add$random, order = c(7,0,0)))
## [1] 2137.768
AIC(arima(weekly_decomp_pr2_add$random, order = c(8,0,0)))
## [1] 2120.578
AIC(arima(weekly_decomp_pr2_add$random, order = c(9,0,0)))
## [1] 2122.574

ARIMA (0,0,q) models with different q values will be tried. After q=7 AIC values starts to increase

AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,1)))
## [1] 2291.776
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,2)))
## [1] 2210.957
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,3)))
## [1] 2123.115
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,4)))
## [1] 2118.695
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,5)))
## [1] 2107.705
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,6)))
## [1] 2101.97
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,7)))
## [1] 2093.406
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,8)))
## [1] 2094.712

Combining two models as ARIMA(8,0,7)

model2 = arima(weekly_decomp_pr2_add$random, order = c(8,0,7))
AIC(model2)
## [1] 2071.746

Fitting the model

pr2_fitted = weekly_decomp_pr2_add$random - residuals(model2)
pr2_fitted_transformed = pr2_fitted+weekly_decomp_pr2_add$seasonal+weekly_decomp_pr2_add$trend
pr2_predictions=cbind(sold_count=weekly_pr2,fitted=pr2_fitted_transformed)
pr2_predictions=as.data.table(pr2_predictions)
pr2_predictions$date=product2$event_date

ggplot(pr2_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_2 = accu(pr2_predictions$sold_count, pr2_predictions$fitted)
print(accu_2)
##     n     mean      FBias MAPE      RMSE      MAD       MADP      WMAPE
## 1 372 11.01882 0.08929007  Inf 0.9919027 0.983871 0.08929007 0.08929007

Product 3

Product 3 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr3= ts(product3$sold_count, freq=7)
weekly_decomp_pr3_add = decompose(weekly_pr3)
plot(weekly_decomp_pr3_add$random)

ACF plot of the random term shows sinusodial behavior as well as the PACF plot. ARIMA(p,0,q) models will be used.

acf(weekly_decomp_pr3_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr3_add$random, na.action = na.pass, lag.max = 50)

ARIMA(p,0,0) models with different p values will be tried. After p=10 AIC values starts to increase.

AIC(arima(weekly_decomp_pr3_add$random, order = c(1,0,0)))
## [1] 3621.813
AIC(arima(weekly_decomp_pr3_add$random, order = c(2,0,0)))
## [1] 3579.341
AIC(arima(weekly_decomp_pr3_add$random, order = c(3,0,0)))
## [1] 3563.754
AIC(arima(weekly_decomp_pr3_add$random, order = c(4,0,0)))
## [1] 3560.052
AIC(arima(weekly_decomp_pr3_add$random, order = c(5,0,0)))
## [1] 3539.706
AIC(arima(weekly_decomp_pr3_add$random, order = c(6,0,0)))
## [1] 3531.944
AIC(arima(weekly_decomp_pr3_add$random, order = c(7,0,0)))
## [1] 3530.479
AIC(arima(weekly_decomp_pr3_add$random, order = c(8,0,0)))
## [1] 3522.776
AIC(arima(weekly_decomp_pr3_add$random, order = c(9,0,0)))
## [1] 3519.717
AIC(arima(weekly_decomp_pr3_add$random, order = c(10,0,0)))
## [1] 3521.508
AIC(arima(weekly_decomp_pr3_add$random, order = c(11,0,0)))
## [1] 3523.342

ARIMA (0,0,q) models with different q values will be tried. q=3 will be used since the decrease in AIC values is very small after 3.

AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,1)))
## [1] 3607.011
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,2)))
## [1] 3558.909
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,3)))
## [1] 3489.457
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,4)))
## [1] 3489.448
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,5)))
## [1] 3488.446

Combining two models as ARIMA(10,0,3)

model3 = arima(weekly_decomp_pr3_add$random, order = c(10,0,3))
## Warning in arima(weekly_decomp_pr3_add$random, order = c(10, 0, 3)): possible
## convergence problem: optim gave code = 1
AIC(model3)
## [1] 3485.596

Fitting the model

pr3_fitted = weekly_decomp_pr3_add$random - residuals(model3)
pr3_fitted_transformed = pr3_fitted+weekly_decomp_pr3_add$seasonal+weekly_decomp_pr3_add$trend
pr3_predictions=cbind(sold_count=weekly_pr3,fitted=pr3_fitted_transformed)
pr3_predictions=as.data.table(pr3_predictions)
pr3_predictions$date=product3$event_date

ggplot(pr3_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_3 = accu(pr3_predictions$sold_count, pr3_predictions$fitted)
print(accu_3)
##     n     mean      FBias MAPE      RMSE      MAD       MADP      WMAPE
## 1 372 92.20968 0.01066993  Inf 0.9919027 0.983871 0.01066993 0.01066993

Product 4

Product 4 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr4= ts(product4$sold_count, freq=7)
weekly_decomp_pr4_add = decompose(weekly_pr4)
plot(weekly_decomp_pr4_add$random)

ACF plot of the random term shows sinusodial behavior as well and PACF plot has spikes. There is a significant spike in lag 2 and 4. Therefore ARIMA(2,0,0) and ARIMA (4,0,0) will be tried.

acf(weekly_decomp_pr4_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr4_add$random, na.action = na.pass, lag.max = 50)

ARIMA(2,0,0)

AIC(arima(weekly_decomp_pr4_add$random, order = c(2,0,0)))
## [1] 5056.963

ARIMA(4,0,0)

AIC(arima(weekly_decomp_pr4_add$random, order = c(4,0,0)))
## [1] 5025.255

ARIMA(4,0,0) model will be used since it has lower AIC value.

ARIMA(0,0,q) models shows that when q=0 AIC value is smaller.

AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,1)))
## [1] 5072.758
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,2)))
## [1] 5074.541
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,3)))
## [1] 4954.022
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,4)))
## [1] 4952.369
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,5)))
## [1] 4953.854

Combining two models as ARIMA(4,0,4)

model4 = arima(weekly_decomp_pr4_add$random, order = c(4,0,4))
AIC(model4)
## [1] 4910.234

Fitting the model

pr4_fitted = weekly_decomp_pr4_add$random - residuals(model4)
pr4_fitted_transformed = pr4_fitted+weekly_decomp_pr4_add$seasonal+weekly_decomp_pr4_add$trend
pr4_predictions=cbind(sold_count=weekly_pr4,fitted=pr4_fitted_transformed)
pr4_predictions=as.data.table(pr4_predictions)
pr4_predictions$date=product4$event_date

ggplot(pr4_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_4 = accu(pr4_predictions$sold_count, pr4_predictions$fitted)
print(accu_4)
##     n     mean       FBias        MAPE      RMSE      MAD        MADP
## 1 372 385.1452 0.002554546 0.004564582 0.9919027 0.983871 0.002554546
##         WMAPE
## 1 0.002554546

Product 5

Product 5 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr5= ts(product5$sold_count, freq=7)
weekly_decomp_pr5_add = decompose(weekly_pr5)
plot(weekly_decomp_pr5_add$random)

ACF plot of the random term has a spike at a lag smaller than 1. sinusodial. PACF plot shows exponential decay at some point and sinusodial behaviour after.

acf(weekly_decomp_pr5_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr5_add$random, na.action = na.pass, lag.max = 50)

ARIMA(0,0,q) models with q values (1,2,3,4,5) will be tried. Smallest AIC value belongs to ARIMA(0,0,5)

AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,1)))
## [1] 1779.282
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,2)))
## [1] 1623.483
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,3)))
## [1] 1622.628
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,4)))
## [1] 1608.534
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,5)))
## [1] 1548.643

ARIMA(p,0,0) will be tried to see if there is a model with smaller AIC. models shows that when q=0 AIC value is smaller.

AIC(arima(weekly_decomp_pr5_add$random, order = c(1,0,5)))
## [1] 1550.419
AIC(arima(weekly_decomp_pr5_add$random, order = c(3,0,5)))
## [1] 1552.771
AIC(arima(weekly_decomp_pr5_add$random, order = c(4,0,5)))
## [1] 1552.582
AIC(arima(weekly_decomp_pr5_add$random, order = c(5,0,5)))
## [1] 1553.968

All AIC values are bigger therefore ARIMA(0,0,5) will be used.

model5 = arima(weekly_decomp_pr5_add$random, order = c(0,0,5))
AIC(model5)
## [1] 1548.643

Fitting the model

pr5_fitted = weekly_decomp_pr5_add$random - residuals(model5)
pr5_fitted_transformed = pr5_fitted+weekly_decomp_pr5_add$seasonal+weekly_decomp_pr5_add$trend
pr5_predictions=cbind(sold_count=weekly_pr5,fitted=pr5_fitted_transformed)
pr5_predictions=as.data.table(pr5_predictions)
pr5_predictions$date=product5$event_date

ggplot(pr5_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_5 = accu(pr5_predictions$sold_count, pr5_predictions$fitted)
print(accu_5)
##     n      mean    FBias MAPE      RMSE      MAD     MADP    WMAPE
## 1 372 0.8306452 1.184466  NaN 0.9919027 0.983871 1.184466 1.184466

Product 6

Product 6 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr6= ts(product6$sold_count, freq=7)
weekly_decomp_pr6_add = decompose(weekly_pr6)
plot(weekly_decomp_pr6_add$random)

ACF plot of the random term has a spike at a lag smaller than 1. sinusodial. PACF plot shows exponential decay.

acf(weekly_decomp_pr6_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr6_add$random, na.action = na.pass, lag.max = 50)

ARIMA(0,0,q) models with q values (1,2,3,4,5) will be tried. ARIMA(0,0,3) model will be used since the decrease in AIC values gets smaller after that point.

AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,1)))
## [1] 4552.394
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,2)))
## [1] 4500.585
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,3)))
## [1] 4422.014
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,4)))
## [1] 4422.395
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,5)))
## [1] 4415.996

ARIMA(p,0,0) will be tried to see if there is a model with smaller AIC. ARIMA(5,0,3) model has the smallest AIC value.

AIC(arima(weekly_decomp_pr6_add$random, order = c(1,0,3)))
## [1] 4423.024
AIC(arima(weekly_decomp_pr6_add$random, order = c(2,0,3)))
## [1] 4418.547
AIC(arima(weekly_decomp_pr6_add$random, order = c(4,0,3)))
## [1] 4411.558
AIC(arima(weekly_decomp_pr6_add$random, order = c(5,0,3)))
## [1] 4408.108

ARIMA(5,0,3) will be used.

model6 = arima(weekly_decomp_pr6_add$random, order = c(5,0,3))
AIC(model6)
## [1] 4408.108

Fitting the model

pr6_fitted = weekly_decomp_pr6_add$random - residuals(model6)
pr6_fitted_transformed = pr6_fitted+weekly_decomp_pr6_add$seasonal+weekly_decomp_pr6_add$trend
pr6_predictions=cbind(sold_count=weekly_pr6,fitted=pr6_fitted_transformed)
pr6_predictions=as.data.table(pr6_predictions)
pr6_predictions$date=product6$event_date

ggplot(pr6_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_6 = accu(pr6_predictions$sold_count, pr6_predictions$fitted)
print(accu_6)
##     n     mean       FBias        MAPE      RMSE      MAD        MADP
## 1 372 392.0323 0.002509668 0.003424822 0.9919027 0.983871 0.002509668
##         WMAPE
## 1 0.002509668

Product 7

Product 7 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr7= ts(product7$sold_count, freq=7)
weekly_decomp_pr7_add = decompose(weekly_pr7)
plot(weekly_decomp_pr7_add$random)

ACF plot of the random term has a spike at around lag 2.5 .PACF plot shows exponential decay.

acf(weekly_decomp_pr7_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr7_add$random, na.action = na.pass, lag.max = 50)

ARIMA(0,0,q) models with q values (1,2,3,4,5) will be tried. ARIMA(0,0,4) model will be used because AIC values starts to increase.

AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,1)))
## [1] 3225.885
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,2)))
## [1] 3220.683
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,3)))
## [1] 3132.23
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,4)))
## [1] 3109.986
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,5)))
## [1] 3111.038

ARIMA(p,0,0) will be tried to see if there is a model with smaller AIC. ARIMA(2,0,4) model has the smallest AIC value.

AIC(arima(weekly_decomp_pr7_add$random, order = c(1,0,4)))
## [1] 3111.557
AIC(arima(weekly_decomp_pr7_add$random, order = c(2,0,4)))
## [1] 3064.732
AIC(arima(weekly_decomp_pr7_add$random, order = c(3,0,4)))
## [1] 3082.122
AIC(arima(weekly_decomp_pr7_add$random, order = c(4,0,4)))
## [1] 3073.936

ARIMA(2,0,4) will be used.

model7 = arima(weekly_decomp_pr7_add$random, order = c(2,0,4))
AIC(model7)
## [1] 3064.732

Fitting the model

pr7_fitted = weekly_decomp_pr7_add$random - residuals(model7)
pr7_fitted_transformed = pr7_fitted+weekly_decomp_pr7_add$seasonal+weekly_decomp_pr7_add$trend
pr7_predictions=cbind(sold_count=weekly_pr7,fitted=pr7_fitted_transformed)
pr7_predictions=as.data.table(pr7_predictions)
pr7_predictions$date=product7$event_date

ggplot(pr7_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_7 = accu(pr7_predictions$sold_count, pr7_predictions$fitted)
print(accu_7)
##     n     mean      FBias       MAPE      RMSE      MAD       MADP      WMAPE
## 1 372 39.72581 0.02476654 0.04023471 0.9919027 0.983871 0.02476654 0.02476654

Product 8

Product 8 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr8= ts(product8$sold_count, freq=7)
weekly_decomp_pr8_add = decompose(weekly_pr8)
plot(weekly_decomp_pr8_add$random)

ACF plot of the random shows sinusodial behavior.PACF plot has spikes around lag 1 and 1.5 and none beyond.

acf(weekly_decomp_pr8_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr8_add$random, na.action = na.pass, lag.max = 50)

ARIMA(p,0,0) models with different p values will be tried. ARIMA(9,0,0) model will be used because AIC values starts to increase.

AIC(arima(weekly_decomp_pr8_add$random, order = c(1,0,0)))
## [1] 2657.071
AIC(arima(weekly_decomp_pr8_add$random, order = c(2,0,0)))
## [1] 2617.024
AIC(arima(weekly_decomp_pr8_add$random, order = c(3,0,0)))
## [1] 2613.855
AIC(arima(weekly_decomp_pr8_add$random, order = c(4,0,0)))
## [1] 2607.959
AIC(arima(weekly_decomp_pr8_add$random, order = c(5,0,0)))
## [1] 2564.091
AIC(arima(weekly_decomp_pr8_add$random, order = c(6,0,0)))
## [1] 2557.697
AIC(arima(weekly_decomp_pr8_add$random, order = c(7,0,0)))
## [1] 2549.243
AIC(arima(weekly_decomp_pr8_add$random, order = c(8,0,0)))
## [1] 2546.981
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,0)))
## [1] 2530.835
AIC(arima(weekly_decomp_pr8_add$random, order = c(10,0,0)))
## [1] 2532.442

ARIMA(0,0,q) will be tried to see if there is a model with smaller AIC. ARIMA(9,0,3) model has the smallest AIC value.

AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,1)))
## [1] 2531.421
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,2)))
## [1] 2509.796
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,3)))
## [1] 2504.486
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,4)))
## [1] 2506.455

ARIMA(9,0,3) will be used.

model8 = arima(weekly_decomp_pr8_add$random, order = c(9,0,3))
AIC(model8)
## [1] 2504.486

Fitting the model

pr8_fitted = weekly_decomp_pr8_add$random - residuals(model8)
pr8_fitted_transformed = pr8_fitted+weekly_decomp_pr8_add$seasonal+weekly_decomp_pr8_add$trend
pr8_predictions=cbind(sold_count=weekly_pr8,fitted=pr8_fitted_transformed)
pr8_predictions=as.data.table(pr8_predictions)
pr8_predictions$date=product8$event_date

ggplot(pr8_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_8 = accu(pr8_predictions$sold_count, pr8_predictions$fitted)
print(accu_8)
##     n     mean      FBias MAPE      RMSE      MAD       MADP      WMAPE
## 1 372 15.79301 0.06229787  NaN 0.9919027 0.983871 0.06229787 0.06229787

Product 9

Product 9 is decomposed additively on a weekly level. Random term is plotted below.

weekly_pr9= ts(product9$sold_count, freq=7)
weekly_decomp_pr9_add = decompose(weekly_pr9)
plot(weekly_decomp_pr9_add$random)

ACF plot of the random shows sinusodial behavior.PACF plot has spikes until lag 2.

acf(weekly_decomp_pr9_add$random, na.action = na.pass, lag.max = 50)

pacf(weekly_decomp_pr9_add$random, na.action = na.pass, lag.max = 50)

ARIMA(p,0,0) models with different p values will be tried. ARIMA(8,0,0) model will be used because AIC values starts to increase.

AIC(arima(weekly_decomp_pr9_add$random, order = c(1,0,0)))
## [1] 3579.892
AIC(arima(weekly_decomp_pr9_add$random, order = c(2,0,0)))
## [1] 3570.061
AIC(arima(weekly_decomp_pr9_add$random, order = c(3,0,0)))
## [1] 3532.722
AIC(arima(weekly_decomp_pr9_add$random, order = c(4,0,0)))
## [1] 3530.453
AIC(arima(weekly_decomp_pr9_add$random, order = c(5,0,0)))
## [1] 3526.35
AIC(arima(weekly_decomp_pr9_add$random, order = c(6,0,0)))
## [1] 3521.961
AIC(arima(weekly_decomp_pr9_add$random, order = c(7,0,0)))
## [1] 3507.079
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,0)))
## [1] 3503.417
AIC(arima(weekly_decomp_pr9_add$random, order = c(9,0,0)))
## [1] 3504.501
AIC(arima(weekly_decomp_pr9_add$random, order = c(10,0,0)))
## [1] 3506.491

ARIMA(0,0,q) will be tried to see if there is a model with smaller AIC. ARIMA(8,0,5) model has the smallest AIC value.

AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,1)))
## [1] 3443.978
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,2)))
## [1] 3445.951
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,4)))
## [1] 3443.681
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,5)))
## [1] 3429.3
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,6)))
## [1] 3444.711

ARIMA(8,0,5) will be used.

model9 = arima(weekly_decomp_pr9_add$random, order = c(8,0,5))
AIC(model9)
## [1] 3429.3

Fitting the model

pr9_fitted = weekly_decomp_pr9_add$random - residuals(model9)
pr9_fitted_transformed = pr9_fitted+weekly_decomp_pr9_add$seasonal+weekly_decomp_pr9_add$trend
pr9_predictions=cbind(sold_count=weekly_pr9,fitted=pr9_fitted_transformed)
pr9_predictions=as.data.table(pr9_predictions)
pr9_predictions$date=product9$event_date

ggplot(pr9_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).

accu_9 = accu(pr9_predictions$sold_count, pr9_predictions$fitted)
print(accu_9)
##     n     mean      FBias       MAPE      RMSE      MAD       MADP      WMAPE
## 1 372 74.19624 0.01326039 0.02099574 0.9919027 0.983871 0.01326039 0.01326039

REGRESSOR ANALYSIS

There are 11 regressors in the data sets for each product.

Price

Price column denotes the price of the product on the given day. Changes in price could be useful for forecasting sales counts. However there are missing observations in this column. Let’s check correlation of sales count and price for each product. Several of the correlation functions have returned NA’s due to missing data. Price can not be used as regressor for such products. For Product 4 and Product 6 price has correlation with sold count even though it is not very significant. This could be observed from both the plots and correlation tests.

(ggplot(data=product1, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 1",y="Sold Count", x="Price"))

cor(product1$price, product1$sold_count)
## [1] -0.2583435
(ggplot(data=product2, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 2",y="Sold Count", x="Price"))

cor(product2$price, product2$sold_count)
## [1] NA
(ggplot(data=product3, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 3",y="Sold Count", x="Price"))

cor(product3$price, product3$sold_count)
## [1] NA
(ggplot(data=product4, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 4",y="Sold Count", x="Price"))

cor(product4$price, product4$sold_count)
## [1] -0.5721201
(ggplot(data=product5, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 5",y="Sold Count", x="Price"))

cor(product5$price, product5$sold_count)
## [1] NA
(ggplot(data=product6, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 6",y="Sold Count", x="Price"))

cor(product6$price, product6$sold_count)
## [1] -0.5127488
(ggplot(data=product7, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 7",y="Sold Count", x="Price"))

cor(product7$price, product7$sold_count)
## [1] -0.32637
(ggplot(data=product8, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 8",y="Sold Count", x="Price"))

cor(product8$price, product8$sold_count)
## [1] NA
(ggplot(data=product9, aes(x=price, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 9",y="Sold Count", x="Price"))

cor(product9$price, product9$sold_count)
## [1] -0.2299915

Visit Count

Visit counts is 0 for most of the products until about February 2021. By visiual inspection there is not any correlation observed excep Product 8 which also has 0 sold count until abour February 2021. Visit count can be used as a regressor for Product 8.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=visit_count))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Visit Counts")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

Correlation between visit count and sold count for product 8 shows that two variables are highly correlated.

(ggplot(data=product8, aes(x=visit_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 8",y="Sold Count", x="Visit Count"))

cor(product8$visit_count, product8$sold_count)
## [1] 0.8969791

Basket Count

By visual inspection for almost all products basket count and sold count seems to be correlated.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=basket_count))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Basket Counts")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

To check plots and correlation coefficients

(ggplot(data=product1, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 1",y="Sold Count", x="Basket Count"))

cor(product1$basket_count, product1$sold_count)
## [1] 0.8372097
(ggplot(data=product2, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 2",y="Sold Count", x="Basket Count"))

cor(product2$basket_count, product2$sold_count)
## [1] 0.95016
(ggplot(data=product3, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 3",y="Sold Count", x="Basket Count"))

cor(product3$basket_count, product3$sold_count)
## [1] 0.9515283
(ggplot(data=product4, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 4",y="Sold Count", x="Basket Count"))

cor(product4$basket_count, product4$sold_count)
## [1] 0.8872024
(ggplot(data=product5, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 5",y="Sold Count", x="Basket Count"))

cor(product5$basket_count, product5$sold_count)
## [1] 0.9011925
(ggplot(data=product6, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 6",y="Sold Count", x="Basket Count"))

cor(product6$basket_count, product6$sold_count)
## [1] 0.8656776
(ggplot(data=product7, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 7",y="Sold Count", x="Basket Count"))

cor(product7$basket_count, product7$sold_count)
## [1] 0.8668665
(ggplot(data=product8, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 8",y="Sold Count", x="Basket Count"))

cor(product8$basket_count, product8$sold_count)
## [1] 0.980521
(ggplot(data=product9, aes(x=basket_count, y=sold_count))+geom_point()+
    theme_minimal()+
    labs(title="Product 9",y="Sold Count", x="Basket Count"))

cor(product9$basket_count, product9$sold_count)
## [1] 0.8194234

As observed in previous plots basket count could be used as a regressor for all products.

Favored Count

Similar to visit count favored count is 0 for all products until the beggining of 2021. By visual inspection there is not much correlation between sold count and favored count.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=favored_count))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Favored Count")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

Correlation coefficients are computed as

cor(product1$favored_count, product1$sold_count)
## [1] 0.1687429
cor(product2$favored_count, product2$sold_count)
## [1] 0.7257766
cor(product3$favored_count, product3$sold_count)
## [1] 0.7896215
cor(product4$favored_count, product4$sold_count)
## [1] 0.2832415
cor(product5$favored_count, product5$sold_count)
## [1] 0.03265166
cor(product6$favored_count, product6$sold_count)
## [1] 0.2280804
cor(product7$favored_count, product7$sold_count)
## [1] -0.1450431
cor(product8$favored_count, product8$sold_count)
## [1] 0.776343
cor(product9$favored_count, product9$sold_count)
## [1] 0.4509207

Only for products 2,3 and 8 favored correlation coefficient is high. This may be due to the fact that sold counts for those products are mainly high starting from winter of 20210. This variable will not be used as a regressor for any product.

Category Sold

By visual inspection correlation between category sold and sold count is possible.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_sold))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Category Sold")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

The numeric results are

cor(product1$category_sold, product1$sold_count)
## [1] 0.9000466
cor(product2$category_sold, product2$sold_count)
## [1] 0.7642985
cor(product3$category_sold, product3$sold_count)
## [1] 0.3998297
cor(product4$category_sold, product4$sold_count)
## [1] 0.9157251
cor(product5$category_sold, product5$sold_count)
## [1] 0.1772956
cor(product6$category_sold, product6$sold_count)
## [1] 0.5323229
cor(product7$category_sold, product7$sold_count)
## [1] 0.7591626
cor(product8$category_sold, product8$sold_count)
## [1] 0.763763
cor(product9$category_sold, product9$sold_count)
## [1] 0.6230038

For products 1, 2, 4, 7, and 8 category sold could be used as a regressor.

Category Visits

Since category visit counts are very large it is not possible to observe visually.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_visits))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Category Visits")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

Numeric correlation coefficients are checked and there is not any significant corelattion.

cor(product1$category_visits, product1$sold_count)
## [1] 0.05368907
cor(product2$category_visits, product2$sold_count)
## [1] 0.2285581
cor(product3$category_visits, product3$sold_count)
## [1] 0.1229495
cor(product4$category_visits, product4$sold_count)
## [1] 0.4285587
cor(product5$category_visits, product5$sold_count)
## [1] 0.09676247
cor(product6$category_visits, product6$sold_count)
## [1] 0.01179411
cor(product7$category_visits, product7$sold_count)
## [1] 0.004137553
cor(product8$category_visits, product8$sold_count)
## [1] 0.4286554
cor(product9$category_visits, product9$sold_count)
## [1] 0.1228725

Category Basket

Category basket values are 0 for all products until February of 2021.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_basket))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Category Basket")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

Numeric correlation coefficients are checked and there is not any significant corelattion.

cor(product1$category_basket, product1$sold_count)
## [1] -0.08685947
cor(product2$category_basket, product2$sold_count)
## [1] 0.7458757
cor(product3$category_basket, product3$sold_count)
## [1] 0.5771166
cor(product4$category_basket, product4$sold_count)
## [1] 0.1962214
cor(product5$category_basket, product5$sold_count)
## [1] -0.09776011
cor(product6$category_basket, product6$sold_count)
## [1] 0.06057578
cor(product7$category_basket, product7$sold_count)
## [1] -0.1224585
cor(product8$category_basket, product8$sold_count)
## [1] 0.7743248
cor(product9$category_basket, product9$sold_count)
## [1] 0.2880863

It seems like there is some correlation between sold count and category basket for some products. This may be due to the fact that sold counts of those products are high in the winter of 2021 and low for other periods which corresponds with the category basket. This variable will not be used as a regressor.

Category Brand Sold

Category brand sold values are also 0 for all products until 2021. This is the same issue with category basket and favored count this variable is will not be used as a regressor for any products.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_brand_sold))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
    labs(title="Category Brand Sold")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

TY VISITs

TY visits are 0 for all products until February of 2021. This variable also will not be used as a regressor.

ggplot(data=my_data)+geom_line(aes(x=event_date, y=ty_visits))+theme_minimal()+
    labs(title="TY VISITS")

ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
    labs(title="Sold Counts")

ARIMA Models with External Regressors

Possible regressors defined in the previous section are:

Product 1 : basket_count and category_sold

Product 2 : basket_count and category_sold

Product 3 : basket_count

Product 4 : basket_count and category_sold

Product 5 : basket_count

Product 6 : price and basket_count

Product 7 : basket_count and category_sold

Product 8 : visit_count, basket_count and category_sold

Product 9 : basket_count

Above variables will be used to model residuals of ARIMA models proposed in previous section.

Product 1

pr1_predictions[, residuals:=sold_count-fitted]
product1$residuals=pr1_predictions$residuals
product1$arima=pr1_predictions$fitted

pr1_lm = lm(residuals~basket_count + category_sold, data=product1[event_date<"2021-05-22"])
pr1_lm
## 
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product1[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##   (Intercept)   basket_count  category_sold  
##     -53.52813       -0.06122        0.16851
product1 = cbind(product1, res_pred=predict(pr1_lm, new_data=product1))
product1[, fitted:= res_pred+arima]

ggplot(data=product1[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product1[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product1[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 2

pr2_predictions[, residuals:=sold_count-fitted]
product2$residuals=pr2_predictions$residuals
product2$arima=pr2_predictions$fitted

pr2_lm = lm(residuals~basket_count + category_sold, data=product2[event_date<"2021-05-22"])
pr2_lm
## 
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product2[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##   (Intercept)   basket_count  category_sold  
##    -0.4466786      0.0030806      0.0001691
product2 = cbind(product2, res_pred=predict(pr2_lm, new_data=product2))
product2[, fitted:= res_pred+arima]

ggplot(data=product2[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product2[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product2[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 3

pr3_predictions[, residuals:=sold_count-fitted]
product3$residuals=pr3_predictions$residuals
product3$arima=pr3_predictions$fitted

pr3_lm = lm(residuals~basket_count, data=product3[event_date<"2021-05-22"])
pr3_lm
## 
## Call:
## lm(formula = residuals ~ basket_count, data = product3[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##  (Intercept)  basket_count  
##     -6.19110       0.01423
product3 = cbind(product3, res_pred=predict(pr3_lm, new_data=product3))
product3[, fitted:= res_pred+arima]

ggplot(data=product3[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product3[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product3[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 4

pr4_predictions[, residuals:=sold_count-fitted]
product4$residuals=pr4_predictions$residuals
product4$arima=pr4_predictions$fitted

pr4_lm = lm(residuals~basket_count + category_sold, data=product4[event_date<"2021-05-22"])
pr4_lm
## 
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product4[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##   (Intercept)   basket_count  category_sold  
##     -50.64080       -0.01864        0.04332
product4 = cbind(product4, res_pred=predict(pr4_lm, new_data=product4))
product4[, fitted:= res_pred+arima]

ggplot(data=product4[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product4[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product4[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 5

pr5_predictions[, residuals:=sold_count-fitted]
product5$residuals=pr5_predictions$residuals
product5$arima=pr5_predictions$fitted

pr5_lm = lm(residuals~basket_count, data=product5[event_date<"2021-05-22"])
pr5_lm
## 
## Call:
## lm(formula = residuals ~ basket_count, data = product5[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##  (Intercept)  basket_count  
##     -0.20615       0.04331
product5 = cbind(product5, res_pred=predict(pr5_lm, new_data=product5))
product5[, fitted:= res_pred+arima]

ggplot(data=product5[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product5[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product5[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 6

pr6_predictions[, residuals:=sold_count-fitted]
product6$residuals=pr6_predictions$residuals
product6$arima=pr6_predictions$fitted

pr6_lm = lm(residuals~basket_count + price, data=product6[event_date<"2021-05-22"])
pr6_lm
## 
## Call:
## lm(formula = residuals ~ basket_count + price, data = product6[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##  (Intercept)  basket_count         price  
##   -100.38798       0.04621       0.19426
product6 = cbind(product6, res_pred=predict(pr6_lm, new_data=product6))
product6[, fitted:= res_pred+arima]

ggplot(data=product6[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product6[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product6[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 7

pr7_predictions[, residuals:=sold_count-fitted]
product7$residuals=pr7_predictions$residuals
product7$arima=pr7_predictions$fitted

pr7_lm = lm(residuals~basket_count + category_sold, data=product7[event_date<"2021-05-22"])
pr7_lm
## 
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product7[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##   (Intercept)   basket_count  category_sold  
##      -4.31413       -0.03833        0.05830
product7 = cbind(product7, res_pred=predict(pr7_lm, new_data=product7))
product7[, fitted:= res_pred+arima]

ggplot(data=product7[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product7[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product7[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 8

pr8_predictions[, residuals:=sold_count-fitted]
product8$residuals=pr8_predictions$residuals
product8$arima=pr8_predictions$fitted

pr8_lm = lm(residuals~basket_count + category_sold+visit_count, data=product8[event_date<"2021-05-22"])
pr8_lm
## 
## Call:
## lm(formula = residuals ~ basket_count + category_sold + visit_count, 
##     data = product8[event_date < "2021-05-22"])
## 
## Coefficients:
##   (Intercept)   basket_count  category_sold    visit_count  
##     0.3212686      0.0045766     -0.0007774      0.0001670
product8 = cbind(product8, res_pred=predict(pr8_lm, new_data=product8))
product8[, fitted:= res_pred+arima]

ggplot(data=product8[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product8[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product8[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))

Product 9

pr9_predictions[, residuals:=sold_count-fitted]
product9$residuals=pr9_predictions$residuals
product9$arima=pr9_predictions$fitted

pr9_lm = lm(residuals~basket_count, data=product9[event_date<"2021-05-22"])
pr9_lm
## 
## Call:
## lm(formula = residuals ~ basket_count, data = product9[event_date < 
##     "2021-05-22"])
## 
## Coefficients:
##  (Intercept)  basket_count  
##     -5.77632       0.01562
product9 = cbind(product9, res_pred=predict(pr9_lm, new_data=product9))
product9[, fitted:= res_pred+arima]

ggplot(data=product9[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product9[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product9[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))